clear all
graph drop _all
set more off
set type double
version 16.1


global curent_path c(pwd)
set seed 1

/* biddata+firmIDdata読み込み*/
use "../data/biddata020119.dta", clear
merge 1:1 rid using "../data/rid_address_license_firmID_020119.dta"
drop _merge

/*サンプル制約は期間（2015-17）のみ*/
keep if fy_con > 2014 & fy_con < 2018
*use "../data/sample_for_screening.dta
/*keep if FLAG == 0*/
bys id: egen min_tech = min(sc_tech)
bys id: egen max_tech = max(sc_tech)
gen FLAG_score = 0
replace FLAG_score = 1 if max_tec < 100
/*drop if min_tech < 100 & max_tech > 100 /*+100されていないbidderがいる（あ）*/
drop if max_tech > 250	*/			/*drop price-only auctions*/


/* firmtests の結果からtsrankをマージ*/
merge 1:1 rid using "../firmleveltest/test_sample_with_results.dta"
	*tsrank　regionfirmID ftt partic regionfirmTG mints1等が追加されるが、cluster分析で使うのはtsrankのみ
drop regionfirmID - mints1
/*usingのデータはFLAG = 1とか; min_tech < 100 & max_tech > 100 とか落ちてるので、
tsrank = .となるobsは以下を回してtsrankを補充*/
sort firmID region tsrank
*br firmID region tsrank _merge
by firmID region: replace tsrank = tsrank[_n - 1] if _n > 1 & tsrank[_n-1] < .
drop _merge
format %40s ad* nm*

	/* ring_firmを補充（これはm1でマージするので上のような補充は不要*/
merge m:1 firmID region using "../data/ring_list020119.dta"
gen regionfirmID = firmID * 10 + region

/******************** how to compute clusters ***************************/
/* by ring firms*/
	*keep if ring_firm == 1
/* by all bidders in the auctions in which ring firms participate*/
	bys id: egen theauctions = max(ring_firm == 1)
	keep if theauctions == 1
**************************************************************************
/* FLAG = 1も残すし、min_sctech < 100とかmin_sctech >250も残す*/
tab fy_con FLAG 
tab fy_con FLAG_scor 


*tab pref if fy_con > 2013
bys regionfirmID: gen firm_n = _n
bys regionfirmID: gen firm_N = _N
egen firmIDX = group(regionfirmID)

tab fy_con FLAG_scor if firmIDX < .
cap mkdir ../firmleveltest/Cluster
keep tsrank firmIDX firmID regionfirmID firm_n id nm_firm ///
	ad_firm_new ring_firm pref city no_license region
rename ad_firm_new ad_firm
save "../firmleveltest/Cluster/firmIDX_map.dta", replace

use "../firmleveltest/Cluster/firmIDX_map.dta", clear
keep firmIDX regionfirmID firm_n id
save "../firmleveltest/Cluster/freq_matrix.dta", empty replace

/* 対戦回数のマトリクスをつくる; 対角成分はそのbidderの入札参加回数（自分との対戦回数）となる*/
gen firmIDXX = firmIDX
qui sum firmIDX
local rmax = `r(max)'
forvalues x = 1(1)`rmax'{
*forvalues x = 1(1)10{
	*local x = 1
	cap drop theauctions
	qui count
	di as res "# Round `x'/`rmax'; obs. left:`r(N)'"
	*local x = 1
	bys id: egen theauctions = max( firmIDXX == `x' )
	qui bys firmIDXX: egen col`x' = total( theauctions ) if firmIDXX ~= .
	/* 早く計算するため、処理が終わった企業は1行だけ残してdropする->
		これから、下三角行列ができる->mataのmakesymmetric(X)で対称行列にする*/
		qui drop if firm_n ~= 1 & firmIDX == `x'
		*di "AAA"
		qui replace firmIDXX = . if firmIDX == `x'
		drop theauctions
	/*if `x' > 1 {
		merge 1:1 firmIDX using freq_matrix.dta
		qui drop _merge
	}*/
	*qui sort firmIDX
	if (mod(`x', 50) == 0 ){
		qui compress
		save "../firmleveltest/Cluster/freq_matrix.dta", replace
	}
}
keep if firm_n == 1 
order col*, seq
compress
order firm*
sort firmIDX
drop firmIDXX firm_n
qui save "../firmleveltest/Cluster/freq_matrix.dta", replace

clear mata
use "../firmleveltest/Cluster/freq_matrix.dta", clear
order firm*
order col*, seq
sort firmIDX
egen divide = rowmax(col*)
order divide
drop if divide == .
*rename col* col*a
*putmata X = (firmID firmIDX)

/* 作った下三角行列を対称行列にしたい*/
	putmata X = (col*)
	mata: X[1..10,1..10]	/* 最初の10x10表示して確かめ*/
	drop col*

	mata: Y = makesymmetric(X);
	mata: Y[1..10,1..10]	/* 最初の10x10表示して確かめ*/
	getmata (c*) = Y
compress
save "../firmleveltest/Cluster/freq_matrix_all.dta", replace

use "../firmleveltest/Cluster/freq_matrix_all.dta", clear
egen bids = rowmax(c*)			/* bidsはその企業がbidした回数*/

unab xx: c*
local count = 1
foreach x in `xx'{
	di "$$ column `x' $$"
	*local b = bids[`count']
	qui replace `x' = `x'/bids /* 自分は1、一度も対戦していない企業は0、対戦数が多いほど1に近づく*/
	*local count = `count' + 1
}

unab xx: c*
foreach x in `xx'{
	di "$$ column `x' $$"
	*local b = bids[`count']
	qui replace `x' = 1 -`x'
	*local count = `count' + 1
}
keep c*
save "../firmleveltest/Cluster/dens_matrix_all.dta", replace


use "../firmleveltest/Cluster/dens_matrix_all.dta", clear
set matsize 8000
cap drop compl*
mat drop _all
mkmat c*, mat(D)
matrix symD= D+D'

gen firmIDX = _n
merge 1:m firmIDX using "../firmleveltest/Cluster/firmIDX_map.dta"
drop if _merge == 2
keep if firm_n == 1
clustermat averagelink symD, name(aa) add force

/*最小１５グループから設定してクラスターできるか*/
	local m = 20
	cap cluster gen g = groups(`m'/50)
	while _rc ~= 0 {
		di "try assuming no. groups = `m'"
		cap noisily cluster gen g = groups(`m'/60)
		di	_rc
		local m = `m' + 1
		*sleep 1000
	}
	/*local m には最小可能グループ数が入っている*/
local l = `m' + 15
drop g`l' - g50
*cluster dendrogram aa, cutn(`m') xlabel(, angle(90) labsize(*.65))
*tab nm_firm if g`m' == 1
sum g*

local lm1 = `l' - 1
forvalues xx = `m'(1) `lm1' {
	qui sum c1
	di r(N)
	local rN = r(N)
	cap drop a
	qui gen a = .
	qui sort g`xx'
	forvalues x = 1(1)`rN'{
		di "No. clusters: `xx'; Loop: `x'/`rN'"
		tempvar temp
		cap drop tmp
		sort g`xx'
		*qui parallel, by(g`xx'): egen tmp = mean(c`x')

		qui by g`xx': egen `temp' = mean(c`x')
		qui replace a = `temp' if firmIDX == `x'
		*qui replace a = tmp if firmIDX == `x'
		*drop tmp
		drop `temp'
	}
	bys g`xx': egen gw_distance`xx' = mean(a)
	drop a
	sort g`m'
}

tabstat gw_distance`m', by(g`m') s(n mean)

drop _merge
merge m:1 region firmID using "../data/ring_list020119.dta"
drop _merge
drop firm_n

/* 上で保存したfirmIDX_map.dtaより、address/region データを持ってくる*/
drop ad_firm
drop region 

merge 1:m regionfirmID using "../firmleveltest/Cluster/firmIDX_map.dta"
keep if firm_n == 1
sum firmID tsrank
drop if _merge == 2
drop _merge
save "../firmleveltest/ringfirms_plus.dta", replace
